Previous script: “06_get_eigengene_QTL.Rmd”
The goal is to find QTL peaks for the WGCNA eigen genes and see if those overalp with any growth QTL. We are only focusing on eigen genes that correlated with some growth traits/paramters.
library(GenomicRanges)
library(qtl)
library(tidyverse)
library(stringr)
load("../output/scanone-eigengene-qtl_2012.RData")
threshold.95 <- tibble(perm.threshold=lod.thrs[5,],
trait=colnames(lod.thrs))
scanone.gather <- scanone_eigen %>%
gather(key = trait, value = LOD, -chr, -pos) %>%
mutate(condition=str_sub(trait,1,2), color=str_sub(trait,6,100)) %>%
left_join(threshold.95)
Joining, by = "trait"
scanone.gather
rr pl.UN <- scanone.gather %>% filter(condition==) %>% ggplot(aes(x=pos,y=LOD)) + geom_line() + geom_hline(aes(yintercept=perm.threshold),lty=2,lwd=.5,alpha=.5) + facet_grid(trait ~ chr, scales=) + theme(strip.text.y = element_text(angle=0), axis.text.x = element_text(angle=90)) + ggtitle(Eigen Gene QTL) pl.UN ggsave(../output/eigen gene eQTL UN 2012.pdf,width=12,height=8)
For each eigen gene, find QTL borders and look for overlap with growth QTL
For each eigen gene first identify chromosomes with “significant” peaks (in this case > 99% permuation threshold) and then run bayesint() on them to define the intervals
sig.chrs <- scanone.gather %>% filter(LOD > perm.threshold) %>%
group_by(trait,chr) %>%
summarize(unique(chr))
sig.chrs
now for each significant chromosome/trait combo run bayesint
scanone_eigen.phys <- scanone_eigen[!str_detect(rownames(scanone_eigen),"^cA"),]
bayesint.list <- apply(sig.chrs,1,function(hit) {
result <- bayesint(scanone_eigen.phys[c("chr","pos",hit["trait"])],
chr=hit["chr"],
lodcolumn = 1,
expandtomarkers = TRUE
)
colnames(result)[3] <- "LOD"
result
})
names(bayesint.list) <- sig.chrs$trait
bayesint.list <- lapply(bayesint.list,function(x) x %>%
as.data.frame() %>%
rownames_to_column(var="markername") %>%
mutate(chr=as.character(chr))
)
bayesint.result <- as.tibble(bind_rows(bayesint.list,.id="trait")) %>%
select(trait,chr,pos,markername,LOD) %>%
separate(markername,into=c("chr1","Mbp"),sep="x", convert=TRUE) %>%
group_by(trait,chr) %>%
summarize(start=min(Mbp),end=max(Mbp),min_eQTL_LOD=min(LOD),max_eQTL_LOD=max(LOD)) %>%
#for the high QTL peaks the interval width is 0. That is overly precise and need to widen those.
mutate(start=ifelse(start==end,max(0,start-20000),start), end=ifelse(start==end,end+20000,end))
bayesint.result
Load annotation
BrapaAnnotation <- read_csv("../input/Brapa_V1.5_annotated.csv")
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
X1 = col_integer(),
name = col_character(),
chrom = col_character(),
start = col_integer(),
end = col_integer(),
subject = col_character(),
AGI = col_character(),
At_symbol = col_character(),
At_description = col_character(),
perc_ID = col_double(),
aln_length = col_integer(),
mismatch = col_integer(),
gap_open = col_integer(),
qstart = col_integer(),
qend = col_integer(),
sstart = col_integer(),
send = col_integer(),
eval = col_double(),
score = col_double()
)
BrapaAnnotation
eigen.annotated <- lapply(1:nrow(bayesint.result),function(row) {
qtl <- bayesint.result[row,]
results <- subset(BrapaAnnotation, chrom==qtl$chr &
start >= qtl$start &
end <= qtl$end)
}
)
names(eigen.annotated) <- bayesint.result$trait
eigen.annotated <- bind_rows(eigen.annotated,.id="trait") %>%
mutate(chrom=as.character(chrom)) %>%
left_join(bayesint.result,by=c("trait","chrom"="chr")) %>% #get eQTL LOD
rename(eigen_eQTL_candidate=name)
eigen.annotated.small <- eigen.annotated %>% select(trait,eigen_eQTL_candidate,ends_with("LOD"))
eigen.annotated.small
write_csv(eigen.annotated.small,
path=str_c("../output/", filebase, "_eigenQTL_ALL_", Sys.Date(), ".csv"))
given bayesint results, find overlaps with UN growth QTL
filepath <- "../input/All2012HeightQTL2.xlsx"
filebase <- filepath %>% basename() %>% str_replace("\\..*$","")
QTLgenes <- readxl::read_excel(filepath)[,-1]
QTLgenes <- QTLgenes %>% dplyr::rename(.id=QTL, FVTtrait=FVT) # change names to match previous file
QTLgenes <- QTLgenes %>% filter(str_detect(FVTtrait,"^UN"))
QTLgenes
rr eigen.qtl.combined <- inner_join(eigen.annotated.small,QTLgenes,by=c(_eQTL_candidate=)) %>% select(.id, trait, everything()) eigen.qtl.combined
how many QTL have at least some overlap?
rr sort(unique(QTLgenes$.id))
[1] \QTL1\ \QTL12\ \QTL13\ \QTL14\ \QTL15\ \QTL16\ \QTL17\ \QTL18\ \QTL19\ \QTL2\ \QTL3\ \QTL33\
[13] \QTL34\ \QTL35\ \QTL6\ \QTL7\
rr sort(unique(eigen.qtl.combined$.id))
[1] \QTL1\ \QTL13\ \QTL14\ \QTL19\ \QTL3\ \QTL35\ \QTL6\ \QTL7\
three of four
are all eigen genes overlapping?
rr unique(eigen.annotated.small$trait)
[1] \UN_MEblue\ \UN_MEbrown\ \UN_MEcyan\ \UN_MEdarkslateblue\
[5] \UN_MElightgreen\ \UN_MEmidnightblue\ \UN_MEpurple\ \UN_MEsteelblue\
[9] \UN_MEturquoise\ \UN_MEyellow\ \UN_MEyellowgreen\
rr unique(eigen.qtl.combined$trait)
[1] \UN_MEblue\ \UN_MEbrown\ \UN_MEcyan\ \UN_MEdarkslateblue\
[5] \UN_MEmidnightblue\ \UN_MEpurple\ \UN_MEturquoise\ \UN_MEyellow\
[9] \UN_MEyellowgreen\
No, 7 of 11
rr write_csv(eigen.qtl.combined, path=str_c(../output/, filebase, _eigenQTL_overlap_, Sys.Date(), .csv))
first convert things to ranges
rr qtl.info <- QTLgenes %>% group_by(.id) %>% summarize(chrom=unique(chrom),start=min(start),end=max(end)) qtl.info r qtl.ranges <- GRanges(seqnames = qtl.info\(chrom,ranges=IRanges(start=qtl.info\)start,end=qtl.info$end)) qtl.ranges <- GenomicRanges::reduce(qtl.ranges)
rr eQTL.ranges <- GRanges(bayesint.result\(chr, ranges = IRanges(start=bayesint.result\)start, end=bayesint.result$end)) eQTL.ranges <- GenomicRanges::reduce(eQTL.ranges)
Make table of chromosome info
rr chr.info <- scanone_eigen.phys %>% as.data.frame() %>% rownames_to_column() %>% select(marker) %>% separate(marker,into=c(,),sep=,convert=TRUE) %>% group_by(chr) %>% summarize(start=min(bp),end=max(bp))
Do the simulations
rr sims <- 1000 set.seed(2323) sim.results <- sapply(1:sims, function(s) { if (s %% 100 == 0) print(s) sim.eQTL <- tibble( chr=sample(chr.info\(chr, size = length(eQTL.ranges), replace = TRUE, prob=chr.info\)end/sum(chr.info\(end)), width=width(eQTL.ranges) # width of the QTL to simulate ) sim.eQTL <- chr.info %>% select(chr,chr.start=start,chr.end=end) %>% right_join(sim.eQTL,by=\chr\) #need to get the chrom end so we can sample correctly sim.eQTL <- sim.eQTL %>% mutate(qtl.start = runif(n=n(), min = chr.start, max= max(chr.start,chr.end-width)), qtl.end=qtl.start+width) sim.eQTL.ranges <- GRanges(seqnames = sim.eQTL\)chr, ranges = IRanges(start=sim.eQTL\(qtl.start, end=sim.eQTL\)qtl.end))
suppressWarnings(result <- sum(countOverlaps(qtl.ranges,sim.eQTL.ranges)>0)) result })
[1] 100
[1] 200
[1] 300
[1] 400
[1] 500
[1] 600
[1] 700
[1] 800
[1] 900
[1] 1000
rr true.overlap <- sum(countOverlaps(qtl.ranges,eQTL.ranges)) #OK to ignore warnings
Each of the 2 combined objects has sequence levels not in the other:
- in 'x': A09, A05
- in 'y': A08
Make sure to always combine/compare objects based on the same reference
genome (use suppressWarnings() to suppress this warning).
rr true.overlap
[1] 4
rr mean(sim.results >= true.overlap)
[1] 0.065
rr tibble(FVTQTL_vs_MReQTL_True_Overlaps=true.overlap, N_Simulations_fewer_overlaps=sum(sim.results < true.overlap), N_Simulations_greater_equal_overlaps=sum(sim.results >= true.overlap), P_value=mean(sim.results >= true.overlap) ) %>% write_csv(str_c(../output/, filebase, _WGCNA_eigen_eQTL_scanone_overlap_pval_, Sys.Date(), .csv))
rr threshold.95 <- tibble(perm.threshold=lod.thrs.cim[5,], trait=colnames(lod.thrs.cim)) scanone.gather <- scanone_eigen_cim %>% gather(key = trait, value = LOD, -chr, -pos) %>% mutate(condition=str_sub(trait,1,2), color=str_sub(trait,6,100)) %>% left_join(threshold.95)
Joining, by = \trait\
rr scanone.gather
rr pl.UN <- scanone.gather %>% filter(condition==) %>% ggplot(aes(x=pos,y=LOD)) + geom_line() + geom_hline(aes(yintercept=perm.threshold),lty=2,lwd=.5,alpha=.5) + facet_grid(trait ~ chr, scales=) + theme(strip.text.y = element_text(angle=0), axis.text.x = element_text(angle=90)) + ggtitle(Eigen Gene QTL) pl.UN ggsave(../output/eigen gene eQTL UN CIM 2012.pdf,width=12,height=8)
For each eigen gene, find QTL borders and look for overlap with growth QTL
For each eigen gene first identify chromosomes with “significant” peaks (in this case > 99% permuation threshold) and then runs bayesint() on them to define the intervals
rr sig.chrs <- scanone.gather %>% filter(LOD > perm.threshold) %>% group_by(trait,chr) %>% summarize(unique(chr)) sig.chrs
now for each significant chromosome/trait combo run bayesint
rr #remove markers without physical position scanone_eigen_cim.phys <- scanone_eigen_cim[!str_detect(rownames(scanone_eigen),^cA),] bayesint.list <- apply(sig.chrs,1,function(hit) { result <- bayesint(scanone_eigen_cim.phys[c(,,hit[])], chr=hit[], lodcolumn = 1, expandtomarkers = TRUE ) colnames(result)[3] <-
result }) names(bayesint.list) <- sig.chrs$trait bayesint.list <- lapply(bayesint.list,function(x) x %>% as.data.frame() %>% rownames_to_column(var=) %>% mutate(chr=as.character(chr)) ) bayesint.result <- as.tibble(bind_rows(bayesint.list,.id=)) %>% select(trait,chr,pos,markername,LOD) %>% separate(markername,into=c(1,),sep=, convert=TRUE) %>% group_by(trait,chr) %>% summarize(start=min(Mbp),end=max(Mbp),min_eQTL_LOD=min(LOD),max_eQTL_LOD=max(LOD)) %>% #for the high QTL peaks the interval width is 0. That is overly precise and need to widen those. mutate(start=ifelse(start==end,max(0,start-20000),start), end=ifelse(start==end,end+20000,end))
bayesint.result
Load annotation
rr BrapaAnnotation <- read_csv(../input/Brapa_V1.5_annotated.csv)
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
X1 = col_integer(),
name = col_character(),
chrom = col_character(),
start = col_integer(),
end = col_integer(),
subject = col_character(),
AGI = col_character(),
At_symbol = col_character(),
At_description = col_character(),
perc_ID = col_double(),
aln_length = col_integer(),
mismatch = col_integer(),
gap_open = col_integer(),
qstart = col_integer(),
qend = col_integer(),
sstart = col_integer(),
send = col_integer(),
eval = col_double(),
score = col_double()
)
|============ | 13%
|============= | 14% 1 MB
|============== | 15% 1 MB
|=============== | 16% 1 MB
|================ | 17% 1 MB
|================= | 19% 1 MB
|================== | 20% 1 MB
|=================== | 21% 1 MB
|==================== | 22% 1 MB
|===================== | 24% 1 MB
|======================= | 25% 1 MB
|======================== | 26% 1 MB
|========================= | 27% 2 MB
|========================== | 29% 2 MB
|=========================== | 30% 2 MB
|============================ | 31% 2 MB
|============================= | 32% 2 MB
|=============================== | 34% 2 MB
|================================ | 35% 2 MB
|================================= | 36% 2 MB
|================================== | 37% 2 MB
|=================================== | 38% 2 MB
|==================================== | 40% 2 MB
|===================================== | 41% 3 MB
|====================================== | 42% 3 MB
|======================================= | 43% 3 MB
|======================================== | 44% 3 MB
|========================================== | 46% 3 MB
|=========================================== | 47% 3 MB
|============================================ | 48% 3 MB
|============================================= | 49% 3 MB
|============================================== | 51% 3 MB
|=============================================== | 52% 3 MB
|================================================ | 53% 3 MB
|================================================= | 54% 4 MB
|================================================== | 55% 4 MB
|=================================================== | 57% 4 MB
|===================================================== | 58% 4 MB
|====================================================== | 59% 4 MB
|======================================================= | 60% 4 MB
|======================================================== | 61% 4 MB
|========================================================= | 62% 4 MB
|========================================================== | 64% 4 MB
|=========================================================== | 65% 4 MB
|============================================================ | 66% 4 MB
|============================================================= | 67% 5 MB
|============================================================== | 69% 5 MB
|=============================================================== | 70% 5 MB
|================================================================ | 71% 5 MB
|================================================================== | 72% 5 MB
|=================================================================== | 73% 5 MB
|==================================================================== | 75% 5 MB
|===================================================================== | 76% 5 MB
|====================================================================== | 77% 5 MB
|======================================================================= | 78% 5 MB
|======================================================================== | 80% 5 MB
|========================================================================= | 81% 6 MB
|========================================================================== | 82% 6 MB
|============================================================================ | 83% 6 MB
|============================================================================= | 84% 6 MB
|============================================================================== | 86% 6 MB
|=============================================================================== | 87% 6 MB
|================================================================================ | 88% 6 MB
|================================================================================= | 89% 6 MB
|================================================================================== | 90% 6 MB
|=================================================================================== | 92% 6 MB
|===================================================================================== | 93% 6 MB
|====================================================================================== | 94% 7 MB
|======================================================================================= | 95% 7 MB
|======================================================================================== | 97% 7 MB
|========================================================================================= | 98% 7 MB
|==========================================================================================| 99% 7 MB
|===========================================================================================| 100% 7 MB
rr BrapaAnnotation
rr eigen.annotated <- lapply(1:nrow(bayesint.result),function(row) { qtl <- bayesint.result[row,] results <- subset(BrapaAnnotation, chrom==qtl\(chr & start >= qtl\)start & end <= qtl\(end) } ) names(eigen.annotated) <- bayesint.result\)trait eigen.annotated <- bind_rows(eigen.annotated,.id=) %>% mutate(chrom=as.character(chrom)) %>% left_join(bayesint.result,by=c(,=)) %>% #get eQTL LOD rename(eigen_eQTL_candidate=name) eigen.annotated.small <- eigen.annotated %>% select(trait,eigen_eQTL_candidate,ends_with()) eigen.annotated.small
given bayesint results, find overlaps with UN growth QTL
rr filepath <- ../input/All2012HeightQTL2.xlsx
filebase <- filepath %>% basename() %>% str_replace(\..*$,\) QTLgenes <- readxl::read_excel(filepath)[,-1] QTLgenes <- QTLgenes %>% dplyr::rename(.id=QTL, FVTtrait=FVT) # change names to match previous file QTLgenes <- QTLgenes %>% filter(str_detect(FVTtrait,^UN)) QTLgenes
rr eigen.qtl.combined <- inner_join(eigen.annotated.small,QTLgenes,by=c(_eQTL_candidate=)) %>% select(.id, trait, everything()) eigen.qtl.combined
how many QTL have at least some overlap?
rr unique(QTLgenes$.id)
[1] \QTL1\ \QTL12\ \QTL13\ \QTL14\ \QTL15\ \QTL16\ \QTL17\ \QTL18\ \QTL19\ \QTL2\ \QTL3\ \QTL33\
[13] \QTL34\ \QTL35\ \QTL6\ \QTL7\
rr unique(eigen.qtl.combined$.id)
[1] \QTL3\ \QTL7\ \QTL19\ \QTL1\ \QTL13\ \QTL6\
two of four
are all eigen genes overlapping?
rr unique(eigen.annotated.small$trait)
[1] \UN_MEblue\ \UN_MEbrown\ \UN_MEcyan\ \UN_MEdarkslateblue\
[5] \UN_MElightgreen\ \UN_MEmidnightblue\ \UN_MEsteelblue\ \UN_MEturquoise\
[9] \UN_MEyellowgreen\
rr unique(eigen.qtl.combined$trait)
[1] \UN_MEbrown\ \UN_MEcyan\ \UN_MEdarkslateblue\ \UN_MEmidnightblue\
[5] \UN_MEturquoise\ \UN_MEyellowgreen\
No, 2
rr write_csv(eigen.qtl.combined, path=str_c(../output/, filebase, _eigenQTL_overlap_CIM_, Sys.Date(), .csv))
rr eQTL.ranges <- GRanges(bayesint.result\(chr, ranges = IRanges(start=bayesint.result\)start, end=bayesint.result$end)) eQTL.ranges <- GenomicRanges::reduce(eQTL.ranges)
Do the simulations
rr sims <- 1000 set.seed(4545) sim.results <- sapply(1:sims, function(s) { if (s %% 100 == 0) print(s) sim.eQTL <- tibble( chr=sample(chr.info\(chr, size = length(eQTL.ranges), replace = TRUE, prob=chr.info\)end/sum(chr.info\(end)), width=width(eQTL.ranges) # width of the QTL to simulate ) sim.eQTL <- chr.info %>% select(chr,chr.start=start,chr.end=end) %>% right_join(sim.eQTL,by=\chr\) #need to get the chrom end so we can sample correctly sim.eQTL <- sim.eQTL %>% mutate(qtl.start = runif(n=n(), min = chr.start, max= max(chr.start,chr.end-width)), qtl.end=qtl.start+width) sim.eQTL.ranges <- GRanges(seqnames = sim.eQTL\)chr, ranges = IRanges(start=sim.eQTL\(qtl.start, end=sim.eQTL\)qtl.end))
suppressWarnings(result <- sum(countOverlaps(qtl.ranges,sim.eQTL.ranges)>0)) result })
[1] 100
[1] 200
[1] 300
[1] 400
[1] 500
[1] 600
[1] 700
[1] 800
[1] 900
[1] 1000
rr true.overlap <- sum(countOverlaps(qtl.ranges,eQTL.ranges)) #OK to ignore warnings true.overlap
[1] 4
rr mean(sim.results >= true.overlap)
[1] 0.005
rr tibble(FVTQTL_vs_MReQTL_True_Overlaps=true.overlap, N_Simulations_fewer_overlaps=sum(sim.results < true.overlap), N_Simulations_greater_equal_overlaps=sum(sim.results >= true.overlap), P_value=mean(sim.results >= true.overlap) ) %>% write_csv(str_c(../output/, filebase, _WGCNA_eigen_eQTL_CIM_overlap_pval_, Sys.Date(), .csv))